* Review of Economics and Statistics MS 23551

/*

This Stata(14) program creates all of the tables and figures reported in the texts of:

  - Jin, Miao, Yu-Jane Liu, Juanjuan Meng, and Yu Zhang. "Transmission of Income Variations to Consumption Variations: 
    The Role of the Firm"

Data: We use a dataset on income, consumption, and financials from a leading commercial bank in Asia (henceforth “Bank”). 
    Due to a confidentiality agreement with the Bank, we cannot publicly distribute the data. After getting permission 
    from the Bank, we will provide contact information to anyone who would like to contact it for access to use the data 
    for replication.

Data files for the analysis: 
	reg_yr_main.dta     		: The main credit card sample, aggregated to the annual frequency (baseline sample for the analysis.)
	reg_yr_all.dta 				: The all direct depositor sample, aggregated to the annual frequency.
	reg_mon_main_peereffect.dta	: The main credit card sample, aggregated to the monthly frequency.
	data_monthly.dta    		: The initial dataset without sample restrictions. 
	firmsizeCensus.dta  		: The distribution of firm size in the official business census.

Key regressors: 
	logcon(_r)					: (Residual measures of) Natural logarithm of total period spending at individual level
	logincome(_r)				: (Residual measures of) Natural logarithm of total period earnings at individual level
	log_income_between(_r)		: (Residual measures of) Natural logarithm of average period earnings in a firm.
	log_income_within(_r)		: (Residual measures of) Natural logarithm of total period earnings at individual level relative to natural logarithm of average period earnings in the firm.


Other key variables:
	con							: total period spending at individual level
	income						: total period earnings at individual level
	saving_bal					: period end balance of deposits
	debt_bal					: period end balance of debt, which includes credit card debt and installment loans
	carddebt_accum_2inc			: credit card debt accumulation, normalized by annual labor earnings
	debt_accum_2inc				: total debt accumulation, which includes credit card debt and installment loans, normalized by annual labor earnings
	savcard_accum_2inc			: net asset accumulation, which accounts for deposits and credit card debt, normalized by annual labor earnings
	savdebt_accum_2inc			: net asset accumulation, which includes deposits, credit card debt, and installment loans, normalized by annual labor earnings
	age 						: age of the individual
	gender						: = 1 for female, and 0 otherwise
	marriage 					: = 1 if the individual is married, and 0 otherwise
	education					: education categories, including graduate degree, bachelor’s degree, associate’s degree, high school, middle school and below.
	ocpclass					: job position categories, including blue collar, white collar, and managerial workers.
	firmsize					: number of employees in a firm
	wealth                      : average account wealth over sample period
	wealthtoincome              : the wealth to income ratio (average wealth over average income for the sample period)
	log_income_between_cluster	: the difference between log average earnings of the income cluster and log firm average earnings
	log_income_within_cluster 	: the difference between log earnings and log average earnings of the income cluster
	logpeer_con_lag 			: lagged log average consumption of the income cluster group
	logself_con_lag 			: lagged log monthly consumption of the individual
	logpeer_inc					: log average earnings of the income cluster group
	hdebt_accum 				: inverse hyperbolic sine of monthly debt accumulation, which includes credit card debt and installment loans
	hsaving_accum 				: inverse hyperbolic sine of monthly deposits accumulation
	hpeer_con_lag 				: inverse hyperbolic sine of lagged average consumption of the income cluster group
	hself_con_lag 				: inverse hyperbolic sine of lagged monthly consumption of the individual
	hself_inc					: inverse hyperbolic sine of monthly earnings of the individual
	hpeer_inc 					: inverse hyperbolic sine of average earnings of the income cluster group
    industry                    : industry categories, including agriculture, manufacturing, utilities and public service, commerce, finance, science and education, and healthcare
    region                      : region identifiers
*/


global outputdir "`where you save the results'"
global workdir "`where the test data are placed'"

***************************************
******	Table 1
***************************************  
** Panel A: Summary Statistics

use "$workdir/reg_yr_main", clear

	local varlist con income saving_bal debt_bal age gender marriage education_1-education_5 ocpclass_1-ocpclass_3
	eststo: estpost summarize `varlist', detail  
		esttab using "$outputdir/table1a", cells( "count mean sd  p25 p50 p75") csv noobs plain  

	bysort company_no: keep if _n==1
	eststo: estpost summarize firmsize, detail
		esttab using "$outputdir/table1a", cells( "count mean sd  p25 p50 p75") csv noobs plain append  

** Panel B: Variances of the Between and Within Components of Income (in Levels and in Growth)

use "$workdir/reg_yr_main", clear

	xtset id year

	capture matrix drop LC
	matrix LC = J(6,2,.)
	sum logincome,detail
	matrix LC[1, 1] = r(Var)
	matrix LC[1, 2] = r(skewness)
	sum log_income_between,detail
	matrix LC[2, 1] = r(Var)
	matrix LC[2, 2] = r(skewness)
	sum log_income_within,detail
	matrix LC[3, 1] = r(Var)
	matrix LC[3, 2] = r(skewness)

	sum S.logincome,detail
	matrix LC[4, 1] = r(Var)
	matrix LC[4, 2] = r(skewness)
	sum S.log_income_between,detail
	matrix LC[5, 1] = r(Var)
	matrix LC[5, 2] = r(skewness)
	sum S.log_income_within,detail
	matrix LC[6, 1] = r(Var)
	matrix LC[6, 2] = r(skewness)

	matrix list LC
	putexcel A1=matrix(LC) using "$outputdir/table1b",replace
	matrix drop LC

***************************************
******	Table 2 And Figure 2
***************************************  
** Difference in Consumption Transmission


use "$workdir/reg_yr_main", clear

	xtset id year

	reg S.logcon S.logincome,vce(cluster id)   
		outreg2 using "$outputdir/table2panela.xls", dec(3) cttop(mainraw)
	reg S.logcon_r S.logincome_r  ,vce(cluster id)   
		outreg2 using "$outputdir/table2panela.xls", dec(3) cttop(mainres) append
	reg S.logcon S.logincome S.log_income_within ,vce(cluster id) // Transformed regression (see Equation 5)
		outreg2 using "$outputdir/table2panelb.xls", dec(3) cttop(mainraw) // Baseline result using the main credit card sample. Displayed in Figure 2
	reg S.logcon_r S.logincome_r S.log_income_within_r  ,vce(cluster id)   
		outreg2 using "$outputdir/table2panelb.xls", dec(3) cttop(mainres) append

use "$workdir/reg_yr_all", clear

	xtset id year

	reg S.logcon S.logincome  ,vce(cluster id)  
		outreg2 using "$outputdir/table2panela.xls", dec(3) cttop(allraw) append
	reg S.logcon_r S.logincome_r  ,vce(cluster id)   
		outreg2 using "$outputdir/table2panela.xls", dec(3) cttop(allres) append
	reg S.logcon S.logincome S.log_income_within ,vce(cluster id)   
		outreg2 using "$outputdir/table2panelb.xls", dec(3) cttop(allraw) append
	reg S.logcon_r S.logincome_r S.log_income_within_r  ,vce(cluster id)   
		outreg2 using "$outputdir/table2panelb.xls", dec(3) cttop(allres) append


***************************************
******	Table 3
***************************************  
** Difference In Debt And Asset Transmission

use "$workdir/reg_yr_main", clear

	xtset id year

	reg S.carddebt_accum_2inc S.logincome S.log_income_within,vce(cluster id)   
		outreg2 using "$outputdir/table3.xls", dec(3) cttop(carddebt) 
	reg S.debt_accum_2inc S.logincome S.log_income_within,vce(cluster id) 
		outreg2 using "$outputdir/table3.xls", dec(3) cttop(debt) append
	reg S.savcard_accum_2inc S.logincome S.log_income_within,vce(cluster id)  
		outreg2 using "$outputdir/table3.xls", dec(3) cttop(savcard) append
	reg S.savdebt_accum_2inc S.logincome S.log_income_within,vce(cluster id)  
		outreg2 using "$outputdir/table3.xls", dec(3) cttop(savdebt) append
		

***************************************
******	Table 4
***************************************  
** Different Persistence

use "$workdir/reg_yr_main", clear

	xtset id year

	reg log_income_within L.log_income_within, vce(cluster id)
		outreg2 using "$outputdir/table4.xls", dec(3) cttop(within) 
	reg log_income_between L.log_income_between  , vce(cluster id)     
		outreg2 using "$outputdir/table4.xls", dec(3) cttop(between) append

	gen L_log_income_within = L.log_income_within
	gen L_log_income_between = L.log_income_between
	reg log_income_within L_log_income_within 
		est store within
	reg log_income_between L_log_income_between
		est store between
	suest within between, vce(cluster id)
		test [within_mean]L_log_income_within=[between_mean]L_log_income_between

***************************************
******	Table 5
***************************************  
** Self-Insurance and the Variances of the Between and Within Components of Income Growth

** Panel A
use "$workdir/reg_yr_main", clear

	xtset id year

	quietly sum wealth, detail
	gen highwealth = wealth > r(p50)  &  wealth < .
	reg S.logcon S.logincome cS.logincome#c.highwealth, vce(cluster id) 
		outreg2 using "$outputdir/table5a.xls", dec(3) cttop(highwealth)

	quietly sum wealthtoincome, detail
	gen highwealthtoincome = wealthtoincome > r(p50)  &  wealthtoincome < .
	reg S.logcon S.logincome cS.logincome#c.highwealthtoincome, vce(cluster id)  
		outreg2 using "$outputdir/table5a.xls", dec(3) cttop(highwealthtoincome) append

** Panel B

	capture matrix drop WSS
	matrix WSS = J(3,5,.)

	sum S.log_income_between
	matrix WSS[1, 1] = r(Var)
	sum S.log_income_between if highwealth==0
	matrix WSS[1, 2] = r(Var)
	sum S.log_income_between if highwealth==1
	matrix WSS[1, 3] = r(Var)

	sum S.log_income_within
	matrix WSS[3, 1] = r(Var)
	sum S.log_income_within if highwealth==0
	matrix WSS[3, 2] = r(Var)
	sum S.log_income_within if highwealth==1
	matrix WSS[3, 3] = r(Var)

	sum S.log_income_between if highwealthtoincome==0
	matrix WSS[1, 4] = r(Var)
	sum S.log_income_between if highwealthtoincome==1
	matrix WSS[1, 5] = r(Var)

	sum S.log_income_within if highwealthtoincome==0
	matrix WSS[3, 4] = r(Var)
	sum S.log_income_within if highwealthtoincome==1
	matrix WSS[3, 5] = r(Var)

	matrix list WSS
	putexcel A1=matrix(WSS) using "$outputdir/table5b"

	
***************************************
******	Table 6
***************************************  
** Difference in Consumption Transmission: Within-Firm Clusters

use "$workdir/reg_yr_main", clear

	xtset id year

	reg S.logcon S.logincome S.log_income_between_cluster S.log_income_within_cluster, vce(cluster id)   
	test S.log_income_between_cluster=S.log_income_within_cluster
	local rp1 = r(p)
		outreg2 using "$outputdir/table6.xls", dec(3) adds(Prob>F b2_b3, `rp1')


***************************************
******	Table 7
***************************************  
** Consumption Peer Effects inside Within-Firm Clusters

use "$workdir/reg_mon_main_peereffect", clear

	reghdfe logcon logpeer_con_lag							  , a(i.id i.cls##i.company_no i.cls##i.yyyymm i.company_no##i.yyyymm) vce(cluster id)  
		outreg2 using "$outputdir/table7.xls", dec(3) cttop(peereffect1)  
	reghdfe logcon logpeer_con_lag logself_con_lag				  , a(i.id i.cls##i.company_no i.cls##i.yyyymm i.company_no##i.yyyymm) vce(cluster id) 
		outreg2 using "$outputdir/table7.xls", dec(3) cttop(peereffect2) append 
	reghdfe logcon logpeer_con_lag logself_con_lag logincome logpeer_inc, a(i.id i.cls##i.company_no i.cls##i.yyyymm i.company_no##i.yyyymm) vce(cluster id) 
		outreg2 using "$outputdir/table7.xls", dec(3) cttop(peereffect3) append 

	reghdfe hdebt_accum   hpeer_con_lag hself_con_lag hself_inc hpeer_inc, a(i.id i.cls##i.company_no i.cls##i.yyyymm i.company_no##i.yyyymm) vce(cluster id)   
		outreg2 using "$outputdir/table7.xls", dec(3) cttop(peereffect4) append 
	reghdfe hsaving_accum hpeer_con_lag hself_con_lag hself_inc hpeer_inc, a(i.id i.cls##i.company_no i.cls##i.yyyymm i.company_no##i.yyyymm) vce(cluster id) 
		outreg2 using "$outputdir/table7.xls", dec(3) cttop(peereffect5) append 


***************************************
******	Table 8
***************************************  
** Joint Tests of Channels: Income Persistence, Self-insurance, and Peer Effect


use "$workdir/reg_yr_main", clear

	bysort industry region: gen count_industryregion = 1 if _n==1
	gen industryregion = sum(count_industryregion) // a way of generating consistent industryregion identifiers

	bysort industryregion: gen Nworkers = _N/2 // weights in the joint test of channels

	** Y -- diff: Strength of the consumption transmission difference (between-firm v.s. within-firm)

	xtset id year

	levelsof industryregion, local(industryregion_list)

	gen diff = . // To be genenrated
	foreach i of local industryregion_list {
		cap reg S.logcon S.logincome S.log_income_within if industryregion==`i', vce(cluster id)
		if _rc==0 {
			replace diff=_b[S.log_income_within] if industryregion==`i'  
			}
		}

	** X1 -- persis_diff: Measure for the strength of the income persistence channel

	xtset id year   

	gen persis_b=.
	gen persis_w=.
	foreach i of local industryregion_list {
		cap reg log_income_within L.log_income_within  if  industryregion==`i', vce(cluster id)
		if _rc==0  {
			replace persis_w=_b[L.log_income_within] if industryregion==`i'  
			}
		cap reg log_income_between L.log_income_between if industryregion==`i', vce(cluster id)
		if _rc==0  {
			replace persis_b=_b[L.log_income_between] if industryregion==`i'
			}
		}
	gen persis_diff=persis_b-persis_w

	** X2 -- varianceratio: Measure for the strength of the self-insurance channel 

	xtset id year

	gen var_bt_low=.
	gen var_bt_high=.
	gen var_wt_low=.
	gen var_wt_high=.
	foreach i of local industryregion_list {
		qui sum wealthtoincome if industryregion==`i', detail
		local median_w2y_i = r(p50)
		qui sum S.log_income_between if wealthtoincome < `median_w2y_i' & industryregion==`i'
		replace var_bt_low = r(Var)  if industryregion==`i'
		qui sum S.log_income_between if wealthtoincome >=`median_w2y_i' & industryregion==`i'
		replace var_bt_high= r(Var)  if industryregion==`i'
		qui sum S.log_income_within  if wealthtoincome < `median_w2y_i' & industryregion==`i'
		replace var_wt_low = r(Var)  if industryregion==`i'
		qui sum S.log_income_within  if wealthtoincome >=`median_w2y_i' & industryregion==`i'
		replace var_wt_high= r(Var)  if industryregion==`i'
		}
	gen varianceratio=(var_wt_high/var_wt_low)/(var_bt_high/var_bt_low)  

	keep industry region industryregion diff persis_diff varianceratio Nworkers
	bysort industryregion: keep if _n==1

	** X3 -- peeravg: Measure for the strength of the peer effect channel

	preserve
		use "$workdir/reg_mon_main_peereffect",clear
		bysort industry region: gen count_industryregion = 1 if _n==1
		gen industryregion = sum(count_industryregion) // a way of generating consistent industryregion identifiers
		egen id_continuous = group(id) // handles a large amount of ids
		
		gen regcoef = .
		gen regpval = .
		gen peercoef = .
		qui sum id_continuous
		forvalues j = 1/`r(max)' {
			cap reg logcon logpeer_con_lag  logself_con_lag  logincome logpeer_inc  if id_continuous==`j', robust
			if _rc==0   {
				replace regcoef = _b[logpeer_con_lag] if id_continuous==`j'
				replace regpval = 2 * ttail( 19 , abs(_b[logpeer_con_lag]/_se[logpeer_con_lag]) ) if id_continuous==`j'
				replace peercoef = regcoef if  (regcoef > 0 & regpval < 0.1) & id_continuous==`j' // peercoef = the reg coef if it is significantly positive at the 10% confidence level, 
				replace peercoef = 0       if !(regcoef > 0 & regpval < 0.1) & id_continuous==`j' // and zero otherwise
			}
		}

		bysort id_continuous: keep if _n==1
		bysort industryregion: egen peeravg = mean(peercoef)
		by     industryregion: keep if _n==1
		save "$workdir/industryregion_peeravg", replace
	restore

	merge 1:1 industryregion using "$workdir/industryregion_peeravg"
	drop _merge
	keep industry region industryregion diff persis_diff varianceratio peeravg Nworkers
	egen persisdiff_sd=std(persis_diff)
	egen varianceratio_sd=std(varianceratio)
	egen peeravg_sd=std(peeravg)
	save "$workdir/jointtest", replace

	** Regressions for the joint test of channels 

	reg diff persisdiff_sd                             [aw = Nworkers], vce(robust)   
		outreg2 using "$outputdir/table8.xls", dec(3) cttop(1)   
	reg diff               varianceratio_sd            [aw = Nworkers], vce(robust)  
		outreg2 using "$outputdir/table8.xls", dec(3) cttop(2) append  
	reg diff                                peeravg_sd [aw = Nworkers], vce(robust)  
		outreg2 using "$outputdir/table8.xls", dec(3) cttop(3) append  
	reg diff persisdiff_sd varianceratio_sd peeravg_sd [aw = Nworkers], vce(robust) 
		outreg2 using "$outputdir/table8.xls", dec(3) cttop(4) append  


***************************************
******	Figure 1
***************************************  
** Distribution of firm size

use "$workdir/data_monthly", clear // the initial dataset (without sample restrictions)
	
	bysort company_no: keep if _n==1 // keep one obs for each firm

	gen firmsizegroup=1 if firmsize<5  
	replace firmsizegroup=2 if firmsize>=5 & firmsize<=29
	replace firmsizegroup=3 if firmsize>=30 & firmsize<=49
	replace firmsizegroup=4 if firmsize>=50 & firmsize<=199
	replace firmsizegroup=5 if firmsize>=200 & firmsize<=499
	replace firmsizegroup=6 if firmsize>=500
	bysort firmsizegroup: gen numoffirms=_N
	keep firmsizegroup numoffirms
	duplicates drop
	egen totnumoffirms=total(numoffirms)
	gen firmsizedensityBankData = numoffirms/totnumoffirms

	merge 1:1 firmsizegroup using "$workdir/firmsizeCensus.dta"
	twoway connected firmsizedensityBankData firmsizedensityCensusData firmsizegroup,  ///
		lpattern(solid dash) legend(cols(1) label(1 "Density in the Bank's Sample") label(2 "Density in the Census"))  ///
		xtitle("Firm Size") ytitle("Density") xlabel(1 "Less than 5" 2 "5 to 29" 3 "30 to 49" 4 "50 to 199" 5 "200 to 499" 6 "More than 500")  ///
		graphr(color(white)) graphregion(margin(vlarge))
		graph export "$outputdir/figure1.png", as(png) 
